import re
import sys
import os
import random
import csv

def write_file(writer_object, mylist, fulllist):
    print(writer_object)
    print(mylist)
    for element in mylist:
        entry = fulllist[element]
        entry = entry.split(',')
        for i in range(1, len(entry)):
            entry[i] = re.sub('\n', '', entry[i])
        writer_object.writerow(entry)
        print(entry)
    

if len(sys.argv) != 5:
    print("Error, Usage: randomSplit.py csvfile perTrain perTest perValidate")
    exit(1)
    
f = open(sys.argv[1], 'rU')
infile = list(f)
num_lines = sum(1 for row in infile)
shuffled = [i for i in range(1, num_lines)]
random.shuffle(shuffled)
print(shuffled)

start_test = float(sys.argv[2]) / 100 * num_lines
start_val = float(sys.argv[3]) / 100 * num_lines + start_test
print(start_val)
train = [0]+shuffled[:int(start_test)]
test = [0]+shuffled[int(start_test):int(start_val)]
val = [0]+shuffled[int(start_val):]

print(train)
print(test)
print(val)

test_name = open(re.sub('\.csv', '_test.csv', sys.argv[1]), 'w')
train_name = open(re.sub('\.csv', '_train.csv', sys.argv[1]), 'w')
val_name = open(re.sub('\.csv', '_val.csv', sys.argv[1]), 'w')

test_file = csv.writer(test_name, lineterminator='\n', delimiter=',')
train_file = csv.writer(train_name, lineterminator='\n', delimiter=',')
val_file = csv.writer(val_name, lineterminator='\n', delimiter=',')

write_file(test_file, test, infile)
write_file(train_file, train, infile)
write_file(val_file, val, infile)

test_name.close()
train_name.close()
val_name.close()
f.close()




